VINTRA

import numpy as np
import pandas
import pandas as pd
from itables import init_notebook_mode
import os
import subprocess

init_notebook_mode(all_interactive=True)
working_directory = f'{os.getcwd()}/../data/gtfs/vintra/'
gtfs_files_directory = f'{working_directory}/gtfs-files/'

gtfs_file_stats_df = pd.DataFrame()

for file in sorted(os.listdir(gtfs_files_directory)):
    if file.endswith('.zip'):
        filename, _, _ = file.partition('.zip')

        p = subprocess.Popen([
            f'java -jar gtfs-validator-301.jar -i gtfs-files/{file} -o reports -v {filename}_report.json -e {filename}_system_errors.json -n -c lt'],
            cwd=working_directory, shell=True, stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
        out, err = p.communicate(timeout=60)
        errcode = p.returncode

        _, _, gtfs_files_txt = out.decode("utf-8").partition('seconds\n')
        gtfs_files = gtfs_files_txt.splitlines()

        gtfs_files_dict = {'failas': filename}
        for gtfs_file_rep in gtfs_files:
            gtfs_file, c = gtfs_file_rep.split('\t')
            gtfs_files_dict[gtfs_file] = c if c != 'MISSING_FILE' else None

        gtfs_file_stats_df = gtfs_file_stats_df.append(gtfs_files_dict, ignore_index=True, )

gtfs_file_stats_df = gtfs_file_stats_df.reindex(
    columns=[
        'failas',
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
        'attributions.txt',
        'feed_info.txt',
        'frequencies.txt',
        'levels.txt',
        'pathways.txt',
        'transfers.txt',
        'translations.txt'
    ]
).set_index('failas')


gtfs_file_stats_df[
    [
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
    ]
] = gtfs_file_stats_df[
    [
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
    ]
].fillna('❌')

gtfs_file_stats_df.fillna('⚠️', inplace=True)
gtfs_file_stats_df.style.set_sticky(axis="index")

gtfs_file_stats_df
agency.txtcalendar.txtcalendar_dates.txtroutes.txtshapes.txtstop_times.txtstops.txttrips.txtfare_attributes.txtfare_rules.txtattributions.txtfeed_info.txtfrequencies.txtlevels.txtpathways.txttransfers.txttranslations.txt
Loading... (need help?)
import json

reports_dir = f'{working_directory}/reports/'

gtfs_notices_df = pd.DataFrame()


for file in sorted(os.listdir(reports_dir)):
    if file.endswith('report.json'):
        gtfs_filename, _, _ = file.partition('_report.json')

        with open(os.path.join(reports_dir, file)) as fp:
                data = json.load(fp)

                for notice in data['notices']:
                    gtfs_notices_df = gtfs_notices_df.append({
                        'failas': gtfs_filename,
                        'klaida': notice['code'],
                        'sunkumas': notice['severity'],
                        'viso': notice['totalNotices'],
                    }, ignore_index=True, )


gtfs_notices_df['viso'] = pd.to_numeric(gtfs_notices_df['viso'], downcast='integer')

GTFS patikrinimas

GTFS patikrinimo klaidos

def show_notices_table_by_severity(severity: str) -> pd.DataFrame:
    gtfs_errors_df = gtfs_notices_df[gtfs_notices_df['sunkumas'] == severity].drop(columns=['sunkumas'])

    gtfs_errors_df = gtfs_errors_df.pivot_table(index='failas', columns='klaida', values='viso', aggfunc='sum', margins=True, fill_value=0)


    gtfs_errors_df.style.set_sticky(axis="index")
    gtfs_errors_df = gtfs_errors_df.style.apply(lambda x: ["background: orange" if v >0 else '' for v in x], axis = 1)

    return gtfs_errors_df

show_notices_table_by_severity('ERROR')
klaida decreasing_or_equal_stop_time_distance duplicate_fare_rule_zone_id_fields equal_shape_distance_diff_coordinates missing_required_file All
failas          
AnyksciuR 0 0 1 0 1
Birstono 0 0 0 5 5
BirzuR 0 0 0 4 4
Druskininku 0 0 0 4 4
IgnalinosR 0 0 5 0 5
JonavosR 0 0 1 0 1
JoniskioR 0 0 1 0 1
JurbarkoR 0 0 2 0 2
Kalvarijos 0 0 0 4 4
KaunoM 0 0 0 4 4
Kazlurudos 0 0 0 4 4
KedainiuR 0 0 17 0 17
KelmesR 0 0 13417 0 13417
KlaipedosR 0 0 13417 0 13417
KupiskioR 0 0 0 4 4
LTSAR 0 0 10317 0 10317
LazdijuR 0 0 0 4 4
PakruojoR 0 0 0 4 4
PanevezioM 0 0 0 4 4
PanevezioR 0 0 0 4 4
PasvalioR 0 0 13416 0 13416
PrienuR 0 0 0 4 4
RadviliskioR 0 0 1 0 1
RaseiniuR 0 0 1 0 1
RokiskioR 0 0 1 0 1
SilutesR 0 0 7 0 7
SkuodoR 0 0 1 0 1
TrakuR 0 0 1 0 1
UkmergesR 0 0 1 0 1
UtenosR 0 0 1 0 1
VarenosR 0 0 1 0 1
ZarasuR 0 0 1 0 1
birstono-gtfs 0 0 0 5 5
google_transit 0 0 6 0 6
gtfs_all 8 2368 10327 0 12703
All 8 2368 60943 54 63373

GTFS patikrinimo įspėjimai

show_notices_table_by_severity('WARNING')
klaida duplicate_route_name equal_shape_distance_same_coordinates fast_travel_between_consecutive_stops fast_travel_between_far_stops leading_or_trailing_whitespaces missing_timepoint_column missing_timepoint_value same_route_and_agency_url stop_too_far_from_shape stop_too_far_from_shape_using_user_distance stops_match_shape_out_of_order unexpected_enum_value All
failas                          
AlytausM 0 0 0 0 0 0 5744 0 0 0 0 0 5744
AlytausR 0 3 0 0 0 0 2885 0 0 0 0 0 2888
AnyksciuR 0 0 0 0 0 0 2627 0 2 0 0 0 2629
Birstono 0 0 0 0 0 1 0 0 0 0 0 0 1
BirzuR 0 0 0 0 0 1 0 0 0 0 0 0 1
Druskininku 0 0 0 0 0 1 0 0 0 0 0 0 1
Elektrenu 0 0 0 0 0 0 434 0 0 0 0 0 434
IgnalinosR 0 0 0 0 0 0 1608 0 5 0 0 0 1613
JonavosR 0 0 0 0 1 0 6866 0 3 0 0 0 6870
JoniskioR 0 4 0 0 0 0 880 0 0 0 0 0 884
JurbarkoR 0 0 0 0 2 0 941 0 1 0 0 0 944
Kalvarijos 0 0 0 0 0 1 0 0 0 0 0 0 1
KaunoM 0 0 0 0 0 1 0 0 0 0 0 0 1
KaunoR 0 0 0 0 0 0 13005 0 0 0 0 0 13005
Kazlurudos 0 0 0 0 0 1 0 0 0 0 0 0 1
KedainiuR 0 1 0 0 0 0 4685 0 2 0 1 0 4689
KelmesR 0 5536 8 3 3 0 19387 0 9 0 0 0 24946
KiasiadoriuR 0 0 0 0 0 0 46 0 0 0 0 0 46
KlaipedosM 0 0 0 0 0 0 217 0 0 0 0 0 217
KlaipedosR 0 5536 8 3 3 0 19387 0 9 0 0 0 24946
KretingosR 0 1 0 0 0 0 2748 0 0 0 0 0 2749
KupiskioR 0 0 0 0 0 1 0 0 0 0 0 0 1
LTSAR 0 4270 8 3 4 0 16101 0 7 0 0 0 20393
LazdijuR 0 0 0 0 0 1 0 0 0 0 0 0 1
Marijampoles 0 2 0 0 0 0 3996 0 0 0 0 0 3998
MazeikiuR 0 2 0 0 0 0 1909 0 3 0 0 0 1914
MoletuR 0 15 0 0 1 0 919 0 0 0 0 0 935
PagegiuR 0 0 0 0 0 0 94 0 0 0 0 0 94
PakruojoR 0 0 0 0 0 1 0 0 0 0 0 0 1
PalangosM 0 0 0 0 0 0 160 0 0 0 0 0 160
PanevezioM 0 0 0 0 0 1 0 0 0 0 0 0 1
PanevezioR 0 0 0 0 0 1 0 0 0 0 0 0 1
PasvalioR 0 5543 8 3 3 0 19531 0 7 0 0 0 25095
PlungesR 0 0 0 0 0 0 2687 0 1 0 0 0 2688
PrienuR 0 0 0 0 0 1 0 0 0 0 0 0 1
RadviliskioR 0 13 0 0 0 0 4077 0 0 0 0 0 4090
RaseiniuR 0 0 0 0 0 0 787 0 0 0 0 0 787
Rietavo 0 0 0 0 0 0 24 0 0 0 0 0 24
RokiskioR 0 3 0 0 0 0 2685 0 0 0 0 0 2688
SalcininkuR 0 0 0 0 0 0 1027 0 0 0 0 0 1027
SiauliuM 0 0 0 0 0 0 256 0 0 0 0 0 256
SiauliuR 0 10 0 0 0 0 2257 0 0 0 0 0 2267
SilalesR 0 2 0 0 0 0 367 0 0 0 0 0 369
SilutesR 0 0 0 0 0 0 1240 0 0 0 0 0 1240
SkuodoR 0 1 0 0 1 0 978 0 0 0 0 0 980
SvencioniuR 0 2 0 0 0 0 1962 0 0 0 0 0 1964
TauragesR 0 0 0 0 0 0 2082 0 1 0 0 0 2083
Taurages_R 0 0 0 0 0 0 2082 0 1 0 0 0 2083
TrakuR 0 23 0 0 1 0 2436 0 2 0 0 0 2462
UkmergesR 0 2 0 0 5 0 4968 0 0 0 0 0 4975
UtenosR 0 7 0 0 0 0 3900 0 0 0 0 0 3907
VarenosR 0 48 0 0 0 0 1967 0 0 0 0 0 2015
VilkaviskioR 0 0 1 1 0 0 1400 0 3 0 1 0 1406
VilniausM 0 0 0 0 0 0 477375 0 0 0 0 17 477392
VilniausR 0 0 0 0 0 0 870 0 0 0 0 0 870
VisaginoM 0 0 0 0 0 0 116 0 0 0 0 0 116
ZarasuR 0 7 0 0 0 0 1332 0 2 0 0 0 1341
akmene 0 46 0 0 0 0 1322 0 0 0 0 0 1368
birstono-gtfs 0 0 0 0 0 1 0 0 0 0 0 0 1
google_transit 0 7 0 0 0 0 2940 0 7 0 0 0 2954
gtfs_all 1328 4325 340 97 0 0 61318 1923 56 1657 0 1 71045
neringa 0 0 0 0 0 0 329 0 0 0 0 0 329
All 1328 25409 373 110 24 13 706954 1923 121 1657 2 18 737932

Stotelės

from zipfile import ZipFile
import gtfs_functions as gtfs
import plotly.express as px

all_stops = pd.DataFrame()
for file in sorted(os.listdir(gtfs_files_directory)):
    if file.endswith('.zip') and file != 'gtfs_all.zip':
        filename, _, _ = file.partition('.zip')

        with ZipFile(os.path.join(gtfs_files_directory, file)) as gtfs_zip:
            if "stops.txt" not in gtfs_zip.namelist():
                continue

            stops_csv = gtfs_zip.open("stops.txt")

        stops_df = pd.read_csv(stops_csv)
        stops_df['failas'] = filename
        all_stops = pd.concat([all_stops, stops_df])

mapbox_access_token = open("../.mapbox_token").read()
px.set_mapbox_access_token(mapbox_access_token)

fig = px.scatter_mapbox(
    data_frame=all_stops,
    lat='stop_lat',
    lon='stop_lon',
    mapbox_style="light",
    zoom=6,
    title='Stotelės',
    hover_name='stop_name',
    color='failas',
)


fig.update_layout(
    mapbox_layers=[
        {
            "sourceattribution": '© <a href="https://judumas.vycius.lt" target="_blank">Karolis Vyčius</a> © <a href="https://www.visimarsrutai.lt/gtfs/" target="_blank">Visimarsrutai.lt</a>'
        }
    ])
fig.update_layout(margin={"r":0,"l":0,"b":0})
fig.show()
with ZipFile(os.path.join(gtfs_files_directory, 'google_transit.zip')) as gtfs_zip:
    stops_csv = gtfs_zip.open("stops.txt")

    google_transit_vintra_stops_df = pd.read_csv(stops_csv)

    fig = px.scatter_mapbox(
        data_frame=google_transit_vintra_stops_df,
        lat='stop_lat',
        lon='stop_lon',
        mapbox_style="light",
        zoom=6,
        title='Google Maps stotelės iš Vintra',
        hover_name='stop_name',
    )


    fig.update_layout(
        mapbox_layers=[
            {
                "sourceattribution": '© <a href="https://judumas.vycius.lt" target="_blank">Karolis Vyčius</a> © <a href="https://www.visimarsrutai.lt/gtfs/" target="_blank">Visimarsrutai.lt</a>'
            }
        ])
    fig.update_layout(margin={"r":0,"l":0,"b":0})
    fig.show()